blast <- read.csv("cannsyn_blast.csv")
blast[1:3,]
## qseqid qlen sseqid slen qstart qend sstart send evalue
## 1 LY658671.1 1639 AH3Ma.chr7 66405468 4 1639 13455428 52948405 0
## 2 LY658671.1 1639 AH3Ma.chr7 66405468 4 1639 13367548 53036285 0
## 3 LY658671.1 1639 AH3Ma.chr7 66405468 4 1639 13306820 53097013 0
## bitscore score length pident nident mismatch positive gapopen gaps ppos
## 1 3016 1633 1636 99.939 1635 1 1635 0 0 99.94
## 2 3011 1630 1636 99.878 1634 2 1634 0 0 99.88
## 3 3011 1630 1636 99.878 1634 2 1634 0 0 99.88
## sstrand
## 1 minus
## 2 minus
## 3 minus
table(blast$qseqid)
##
## AB212829.1 AB212830.1 AB292682.1 AB292683.1 AB292684.1 LY658671.1
## 792 792 283 785 753 792
cbdas <- blast[blast$qseqid == "AB292682.1", ]
#cbdas <- cbdas[cbdas$pident >= 98, ]
cbdas$sseqid <- sub("\\.chr7", "", cbdas$sseqid)
table(cbdas$sseqid)
##
## AH3Ma AH3Mb BCMa BCMb BOAXa BOAXb COFBa COFBb COSVa COSVb DPFBa
## 6 5 5 5 4 2 2 5 5 4 4
## DPFBb DPSVa DPSVb EH23a EH23b FCS1a FCS1b GERv1a GERv1b GRMa GRMb
## 2 4 4 3 4 2 4 5 4 4 5
## H3S1a H3S1b H3S7a H3S7b KCDv1a KCDv1b KOMPa KOMPb MBFBa MBFBb MBSVa
## 4 6 4 2 5 5 5 5 2 2 2
## MBSVb MM3v1a MM3v1b NLv1a NLv1b OFBa OFBb PPFBa PPFBb S8Ha S8Hb
## 4 1 1 2 2 6 2 2 2 4 3
## SAN2a SAN2b SDFBa SDFBb SHH5a SHH5b SKFBa SKFBb SN1v3a SN1v3b SODLa
## 4 4 3 2 4 3 6 2 4 4 5
## SODLb STHa STHb SVA12a SVA12b SVA6a SVA6b SZFBa SZFBb TKFBa TKFBb
## 2 1 3 4 4 4 4 6 2 6 2
## TWFBa TWFBb TWSVa TWSVb UFBa UFBb WCFBa WCFBb WHWa WHWb YMv2a
## 3 2 3 4 5 2 2 2 6 6 5
## YMv2b
## 5
tmp <- read.csv("cbdas_ncbi.csv")
#tmp <- tmp[tmp$pident >= 98, ]
tmp[1:3, ]
## qseqid qlen sseqid slen qstart qend sstart send evalue bitscore
## 1 AB292682.1 1635 Abacus 63009714 1 1635 52216283 52217917 0 3020
## 2 AB292682.1 1635 Abacus 63009714 1 1635 51977965 51979602 0 2141
## 3 AB292682.1 1635 Abacus 63009714 1 1635 52071820 52073457 0 2130
## score length pident nident mismatch positive gapopen gaps ppos sstrand
## 1 1635 1635 100.000 1635 0 1635 0 0 100.00 plus
## 2 1159 1638 90.293 1479 156 1479 2 3 90.29 plus
## 3 1153 1638 90.171 1477 158 1477 2 3 90.17 plus
#table(tmp$sseqid)
#tmp[tmp$sseqid == "Cannbio-2", ]
#tmp[tmp$sseqid == "jl", ]
cbdas <- rbind(cbdas, tmp)
cbdas <- cbdas[cbdas$pident >= 98, ]
table(cbdas$sseqid)
##
## Abacus BCMa BCMb BOAXa BOAXb CBDRx COSVb DPSVb
## 1 1 1 1 1 1 1 1
## EH23b Finola GERv1a GERv1b GRMa GRMb JL_Mother KCDv1a
## 1 1 1 1 1 1 1 1
## KCDv1b KOMPa KOMPb MBSVb MM3v1a MM3v1b S8Ha SHH5a
## 1 1 1 1 1 1 1 1
## STHa SVA12a SVA12b SVA6a SVA6b TWSVb YMv2a YMv2b
## 1 1 1 1 1 1 1 1
busc <- read.csv("BUSCOchrom7.csv.gz")
busc[1:3, ]
## GRMb MM3v1b SKFBa OFBa SZFBa AH3Ma
## BUSCO_ID=3166at71240 65286253 55579498 66713137 66317410 67061999 66051080
## BUSCO_ID=71319at71240 65023896 55317742 66443801 66039010 66799898 65772685
## BUSCO_ID=91349at71240 64908056 55208151 66346418 65939846 66684053 65673517
## H3S1b TKFBa GERv1a NLv1a KCDv1a YMv2b
## BUSCO_ID=3166at71240 65770433 67488932 71114214 70547443 68758573 NA
## BUSCO_ID=71319at71240 65490615 67210527 70812690 70285090 68465576 NA
## BUSCO_ID=91349at71240 65375514 67111361 70705281 70169250 68367101 NA
## DPFBa SVA6b SVA6a KCDv1b TWSVa TWFBa
## BUSCO_ID=3166at71240 68355999 57025633 58432902 65888651 68210096 67991257
## BUSCO_ID=71319at71240 68088363 57289169 58696438 65598806 67914651 67695812
## BUSCO_ID=91349at71240 67980498 57392508 58799777 65500331 67799094 67580260
## H3S7a FCS1b COFBb SDFBa SODLb SAN2a
## BUSCO_ID=3166at71240 66742958 67168769 67300029 67276384 70372662 66435220
## BUSCO_ID=71319at71240 66463142 66895217 67034789 67014022 70097785 66161668
## BUSCO_ID=91349at71240 66348043 66780115 66908047 66898181 69979250 66046566
## DPSVa YMv2a SAN2b MM3v1a COSVa SVA12a
## BUSCO_ID=3166at71240 68413070 NA 66271111 62100268 68014952 59529159
## BUSCO_ID=71319at71240 68145440 NA 65985206 61810612 67749716 59792695
## BUSCO_ID=91349at71240 68037572 NA 65870104 61681487 67622979 59896034
## SZFBb SODLa AH3Mb WHWb WHWa EH23a
## BUSCO_ID=3166at71240 65451624 66312621 66119675 65986693 65652068 65404425
## BUSCO_ID=71319at71240 65172417 66017291 65841280 65723052 65377397 65125238
## BUSCO_ID=91349at71240 65040388 65926868 65742112 65604807 65269533 64993212
## FCS1a STHb PPFBa STHa S8Hb COFBa
## BUSCO_ID=3166at71240 66154507 65152589 63952183 66153069 64094844 63557806
## BUSCO_ID=71319at71240 65886892 64864912 63662262 65865293 63815658 63295236
## BUSCO_ID=91349at71240 65779027 64769916 63563096 65736164 63683635 63194828
## SKFBb KOMPb NLv1b SHH5b PPFBb UFBb
## BUSCO_ID=3166at71240 65578794 65476390 66486958 64526525 63914573 64982578
## BUSCO_ID=71319at71240 65299605 65178042 66224605 64247339 63635381 64720010
## BUSCO_ID=91349at71240 65167576 65076688 66108765 64113332 63503352 64619600
## WCFBb DPFBb OFBb BCMa H3S7b GERv1b
## BUSCO_ID=3166at71240 64346173 64872297 64884477 65092030 64090130 NA
## BUSCO_ID=71319at71240 64083603 64609728 64621907 64785483 63794690 NA
## BUSCO_ID=91349at71240 63983194 64509319 64521499 64669126 63679140 NA
## SDFBb MBFBb TWFBb TKFBb S8Ha BOAXb
## BUSCO_ID=3166at71240 65193212 64544135 64941741 64958896 60849792 64705418
## BUSCO_ID=71319at71240 64914023 64281566 64662552 64679707 60587432 64416321
## BUSCO_ID=91349at71240 64781995 64181156 64530519 64547675 60471592 64290042
## KOMPa H3S1a UFBa SHH5a BCMb MBFBa
## BUSCO_ID=3166at71240 59321660 64881477 64662190 63812134 63953562 64585476
## BUSCO_ID=71319at71240 64836183 64603086 64391554 63549769 63674897 64307082
## BUSCO_ID=91349at71240 64727827 64503922 64287856 63433924 63555982 64207915
## BOAXa WCFBa MBSVa GRMa MBSVb EH23b
## BUSCO_ID=3166at71240 63842027 65540064 66565083 61742407 63031272 62296366
## BUSCO_ID=71319at71240 63558199 65261666 66286689 61480050 63294809 62559906
## BUSCO_ID=91349at71240 63452011 65162501 66187525 61364210 63398147 62663245
## SVA12b DPSVb TWSVb COSVb CBDRX.CBDRX.chr7
## BUSCO_ID=3166at71240 61351590 63314921 61879113 63107633 96875
## BUSCO_ID=71319at71240 61615126 63578455 62142649 63371167 359821
## BUSCO_ID=91349at71240 61718465 63681794 62245985 63474504 463043
## CBDRx.NC_044378.1 ABAC.ABAC.chr7 Abacus.CM046076.1
## BUSCO_ID=3166at71240 96895 124294 124314
## BUSCO_ID=71319at71240 359829 395893 395901
## BUSCO_ID=91349at71240 463090 502517 502564
## FIN.FIN.chr7 Finola.CM011610.1 PK.PK.chr7
## BUSCO_ID=3166at71240 178504 178520 NA
## BUSCO_ID=71319at71240 482401 487657 NA
## BUSCO_ID=91349at71240 NA NA NA
## Purple_Kush.CM010797.2 Cannbio.2.CM028020.1
## BUSCO_ID=3166at71240 NA NA
## BUSCO_ID=71319at71240 NA 373985
## BUSCO_ID=91349at71240 NA NA
## jl_Kyirong.CM022973.1
## BUSCO_ID=3166at71240 NA
## BUSCO_ID=71319at71240 NA
## BUSCO_ID=91349at71240 NA
# busc <- busc[, grep("CBDRx.NC_044378.1|Abacus.CM046076.1|Finola.CM011610.1|Purple_Kush.CM010797.2", colnames(busc), invert = TRUE)]
# colnames(busc)[ colnames(busc) == "CBDRX.CBDRX.chr7"] <- "CBDRx"
# colnames(busc)[ colnames(busc) == "ABAC.ABAC.chr7"] <- "Abacus"
# colnames(busc)[ colnames(busc) == "FIN.FIN.chr7"] <- "Finola"
# colnames(busc)[ colnames(busc) == "PK.PK.chr7"] <- "Purple Kush"
# colnames(busc)[ colnames(busc) == "Cannbio.2.CM028020.1"] <- "Cannbio-2"
# colnames(busc)[ colnames(busc) == "jl_Kyirong.CM022973.1"] <- "jl_Kyirong"
#busc <- busc[, grep("CBDRx.NC_044378.1|Abacus.CM046076.1|Finola.CM011610.1|Purple_Kush.CM010797.2", colnames(busc), invert = TRUE)]
busc <- busc[, grep("CBDRX.CBDRX.chr7|ABAC.ABAC.chr7|FIN.FIN.chr7|PK.PK.chr7", colnames(busc), invert = TRUE)]
colnames(busc)[ colnames(busc) == "CBDRx.NC_044378.1"] <- "CBDRx"
colnames(busc)[ colnames(busc) == "Abacus.CM046076.1"] <- "Abacus"
colnames(busc)[ colnames(busc) == "Finola.CM011610.1"] <- "Finola"
colnames(busc)[ colnames(busc) == "Purple_Kush.CM010797.2"] <- "Purple Kush"
colnames(busc)[ colnames(busc) == "Cannbio.2.CM028020.1"] <- "Cannbio-2"
colnames(busc)[ colnames(busc) == "jl_Kyirong.CM022973.1"] <- "jl_Kyirong"
busc[1:3, 1:8]
## GRMb MM3v1b SKFBa OFBa SZFBa AH3Ma
## BUSCO_ID=3166at71240 65286253 55579498 66713137 66317410 67061999 66051080
## BUSCO_ID=71319at71240 65023896 55317742 66443801 66039010 66799898 65772685
## BUSCO_ID=91349at71240 64908056 55208151 66346418 65939846 66684053 65673517
## H3S1b TKFBa
## BUSCO_ID=3166at71240 65770433 67488932
## BUSCO_ID=71319at71240 65490615 67210527
## BUSCO_ID=91349at71240 65375514 67111361
busc[1:3, (ncol(busc)-4):ncol(busc)]
## Abacus Finola Purple Kush Cannbio-2 jl_Kyirong
## BUSCO_ID=3166at71240 124314 178520 NA NA NA
## BUSCO_ID=71319at71240 395901 487657 NA 373985 NA
## BUSCO_ID=91349at71240 502564 NA NA NA NA
library(BUSCOplot)
library(ggplot2)
class(unlist(busc["BUSCO_ID=100415at71240", ]))
## [1] "integer"
#busc <- busc[ , 60:ncol(busc)]
colnames(busc)
## [1] "GRMb" "MM3v1b" "SKFBa" "OFBa" "SZFBa"
## [6] "AH3Ma" "H3S1b" "TKFBa" "GERv1a" "NLv1a"
## [11] "KCDv1a" "YMv2b" "DPFBa" "SVA6b" "SVA6a"
## [16] "KCDv1b" "TWSVa" "TWFBa" "H3S7a" "FCS1b"
## [21] "COFBb" "SDFBa" "SODLb" "SAN2a" "DPSVa"
## [26] "YMv2a" "SAN2b" "MM3v1a" "COSVa" "SVA12a"
## [31] "SZFBb" "SODLa" "AH3Mb" "WHWb" "WHWa"
## [36] "EH23a" "FCS1a" "STHb" "PPFBa" "STHa"
## [41] "S8Hb" "COFBa" "SKFBb" "KOMPb" "NLv1b"
## [46] "SHH5b" "PPFBb" "UFBb" "WCFBb" "DPFBb"
## [51] "OFBb" "BCMa" "H3S7b" "GERv1b" "SDFBb"
## [56] "MBFBb" "TWFBb" "TKFBb" "S8Ha" "BOAXb"
## [61] "KOMPa" "H3S1a" "UFBa" "SHH5a" "BCMb"
## [66] "MBFBa" "BOAXa" "WCFBa" "MBSVa" "GRMa"
## [71] "MBSVb" "EH23b" "SVA12b" "DPSVb" "TWSVb"
## [76] "COSVb" "CBDRx" "Abacus" "Finola" "Purple Kush"
## [81] "Cannbio-2" "jl_Kyirong"
# LETTERS[1:10] %in% LETTERS[3:4]
#colnames(busc)[ !colnames(busc) %in% cbdas$sseqid ]
busc <- busc[, colnames(busc) %in% cbdas$sseqid]
#colnames(busc)
#names(busc) %in% cbdas$sseqid
cbdas <- cbdas[grep("JL_Mother", cbdas$sseqid, invert = TRUE), ]
myx <- factor(cbdas$sseqid, levels = colnames(busc))
p <- gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2)
## Warning in gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2):
## Check for BUSCO table disabled, the user is responsible for results!
p
p <- p + annotate( geom = "line", x = as.numeric(myx), y = cbdas$sstart, linewidth = 1.2, color = "#1E90FF")
p <- p + annotate( geom = "point", x = myx, y = cbdas$sstart, shape = 24, bg = "#1E90FF", size = 4)
p
# ggsave(filename = "ggbusco_lineplot_chrom7_public.png",
# device = "png", width = 6.5, height = 4.5, units = "in", dpi = 300)
busc <- busc[ , sort.int(colnames(busc) ,index.return = TRUE)$ix]
busc <- cbind(busc[ , grep("CBDRx|Abacus|Finola", colnames(busc), invert = TRUE)], busc[ , c("CBDRx", "Abacus", "Finola")])
myx <- factor(cbdas$sseqid, levels = colnames(busc))
p <- gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2)
## Warning in gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2):
## Check for BUSCO table disabled, the user is responsible for results!
p
p <- p + annotate( geom = "line", x = as.numeric(myx), y = cbdas$sstart, linewidth = 1.2, color = "#1E90FF")
p <- p + annotate( geom = "point", x = myx, y = cbdas$sstart, shape = 24, bg = "#1E90FF", size = 4)
p